#### The Populist Label: Replication Code for Paper Figures ###

### ------------------ Libraries ------------------ ###

library(dplyr)
library(ggplot2)
library(fixest)
library(broom)
library(modelsummary)
library(tibble)
library(reticulate)
library(tidyverse)
library(stringr)
library(readr)

### ------------------ Working directory ------------------ ###

setwd() # Complete with user's WD

### ------------------ Datasets ------------------ ###

data <- read_dta("ple_dataset.dta") # Funke, Schularick, and Trebesch (2023) dataset

pop_hist <- read.csv("pop_hist.csv") # contains all populist history edits data

pop_hist_content <- read.csv("pop_hist_pages.csv") # contains populist content pages and sentiment scores

all_edits_scores_content <- readRDS("all_edits_scores_content.rds") # contains populist and non-populist pages and sentiment scores


### ------------------ Figures - Descriptives ------------------ ###

### Figure 1: Populist Leaders over Time - Reproduced from Funke, Schularick, and Trebesch (2023)

# Summarize data to calculate the share of each type of populist government
df_summary <- data %>%
  group_by(year) %>%
  summarize(
    share_populist = sum(pop, na.rm = TRUE) / n() * 100,
    share_right_wing_populist = sum(rpop, na.rm = TRUE) / n() * 100,
    share_left_wing_populist = sum(lpop, na.rm = TRUE) / n() * 100
  ) %>%
  # Ensure left and right populist shares add up to the total populist share
  mutate(share_left_wing_populist = case_when(
    share_populist - share_right_wing_populist >= 0 ~ share_populist - share_right_wing_populist,
    TRUE ~ share_left_wing_populist
  ))

# Pivot data to long format for ggplot
df_long <- df_summary %>%
  pivot_longer(cols = c("share_right_wing_populist", "share_left_wing_populist"), names_to = "type", values_to = "share") %>%
  mutate(type = factor(type, levels = c("share_right_wing_populist", "share_left_wing_populist"),
                       labels = c("Right-wing populism", "Left-wing populism")))

# Plotting using ggplot2
ggplot() +
  # Add stacked areas for right-wing and left-wing populism
  geom_area(data = df_long, aes(x = year, y = share, fill = type), position = 'stack', alpha = 0.8) +
  # Add the overall populist line
  geom_line(data = df_summary, aes(x = year, y = share_populist), color = "red", size = 1) +
  scale_fill_manual(values = c("darkgray", "lightgray")) +
  labs(
    x = NULL,
    y = "Share of independent countries with populist government (%)",
    fill = NULL
  ) +
  scale_x_continuous(limits = c(1900, 2020)) + scale_y_continuous(limits = c(0, 25)) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 14),
    axis.title.y = element_text(size = 13),
    axis.text = element_text(size = 14),
    panel.grid.minor = element_blank()
  )

### Figure 2: Mentions of Populism Terms Over Time in Wikipedia Heads of Government Pages

# Define populism-related terms
populism_terms <- c("populism", "populist", "populists", "populistic")

# Create a regex pattern
populism_pattern <- paste(populism_terms, collapse = "|")

# Process the data
timeline_data <- all_edits_scores_content %>%
  mutate(
    Year = year(Date),
    Mentions = str_count(Content, regex(populism_pattern, ignore_case = TRUE))
  ) %>%
  group_by(Year, Populist) %>%
  summarize(
    TotalMentions = sum(Mentions, na.rm = TRUE),
    TotalPages = n_distinct(Leader),
    MentionsPerPage = TotalMentions / TotalPages
  ) %>%
  ungroup()

# Create the plot
ggplot(timeline_data, aes(x = Year, y = MentionsPerPage, color = Populist)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  scale_color_manual(values = c("Non-Populist" = "steelblue", "Populist" = "firebrick")) +
  labs(
    #title = "Mentions of Populism Terms Over Time",
    #subtitle = "Average mentions per page for Populist and Non-Populist leaders",
    x = "Year",
    y = "Average Mentions per Page",
    color = "Leader Type"
  ) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5)
  )

# Save the plot
ggsave("populism_mentions_timeline.png", width = 12, height = 8)


### Figure 3: First Mention and First Association with Populism on Wikipedia and The Guardian

# Wiki data preparation
pop_hist <- read.csv("data/wikipedia_edit_histories_complete.csv")

wiki_data <- pop_hist %>%
  group_by(Leader) %>%
  summarize(
    Wiki_First_Mention = min(Date, na.rm = TRUE),
    Wiki_First_Populism = min(Date[str_detect(Content, "populis")], na.rm = TRUE)
  )

# Guardian data preparation
guardian_data <- read.csv("leader_mentions_detailed.csv") %>%
  rename(Leader = leader,
         Guardian_First_Mention = first_mention,
         Guardian_First_Populism = first_populism_mention)

# Wiki data is already in the correct format, just ensure date is in Date format
wiki_data <- wiki_data %>%
  mutate(Date = as.Date(Date))

# Reshape Guardian data to match Wiki data format
guardian_data <- guardian_data %>%
  pivot_longer(cols = c(Guardian_First_Mention, Guardian_First_Populism),
               names_to = "Event",
               values_to = "Date") %>%
  mutate(
    Source = "Guardian",
    Event = case_when(
      Event == "Guardian_First_Mention" ~ "First Mention",
      Event == "Guardian_First_Populism" ~ "First Populism Association"
    ),
    Date = as.Date(Date)
  ) %>%
  select(Leader, Event, Date, Source)

# Combine the data
plot_data <- bind_rows(wiki_data, guardian_data) %>%
  mutate(
    Source = factor(Source, levels = c("Wikipedia", "Guardian")),
    Event = factor(Event, levels = c("First Mention", "First Populism Association"))
  ) %>%
  filter(!is.na(Date))

# Order leaders by earliest mention
plot_data <- plot_data %>%
  group_by(Leader) %>%
  mutate(earliest_date = min(Date, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Leader = forcats::fct_reorder(Leader, earliest_date))

# Check the structure
print(head(plot_data))

final_plot <- ggplot(plot_data, aes(x = Date, y = Leader)) +
  geom_line(aes(group = interaction(Leader, Source), color = Source), size = 1) +
  geom_point(aes(color = Source, shape = Event), size = 3) +
  scale_color_manual(values = c("Wikipedia" = "blue", "Guardian" = "red")) +
  scale_shape_manual(values = c("First Mention" = 16, "First Populism Association" = 17)) +
  labs(title = "",
       x = "",
       y = "",
       color = "Source",
       shape = "Event") +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 8),
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5)
  ) +
  scale_x_date(date_breaks = "5 years", date_labels = "%Y")


ggsave("plots/first_mention_wiki_guardian.pdf", final_plot, width = 6, height = 7)


### Figure 4: Sentiment Scores Over Time by Leader

# Make sure Date is in an appropriate format
pop_hist2 <- pop_hist %>%
  mutate(Date = as.Date(Date)) %>% filter(!is.na(populism))

# Plot sentiment scores over time
ggplot(pop_hist2, aes(x = Date, y = ifelse(populism == 1, pop_sent_compound, non_pop_sent_compound), color = factor(populism))) +
  geom_point(size=1) +
  scale_color_manual(values = c("0" = "blue", "1" = "red")) +
  facet_wrap(~ Leader, scales = "free_x", ncol=4) +
  theme_minimal() +
  labs(
    #title = "Sentiment Scores Over Time by Leader",
    y = "Sentiment Score",
    x = "Date",
    color = "Populism Mention"
  ) +
  theme(
    axis.text.x = element_text(size = 6) 
  )


### ------------------ EVENT STUDY (Table 3, Figure 5, Figure 6) ------------------ ###


### Figure 5: Event study of how sentiments change after first mention of populism

populist_data <- pop_hist %>%
  group_by(Leader) %>%
  mutate(
    first_populist_mention = min(Year[populism == 1], na.rm = TRUE),
    YearsToEvent = Year - first_populist_mention,
    TimeBin = cut(YearsToEvent, 
                  breaks = c(-Inf, -3, -2, -1, 0, 1, 2, 3, Inf),
                  labels = c("More than 3 years before", "2-3 years before", "1-2 years before", 
                             "0-1 year before", "0-1 year after", 
                             "1-2 years after", "2-3 years after", 
                             "More than 3 years after"))
  ) %>%
  ungroup()

# Check the distribution of TimeBin
table(populist_data$TimeBin, useNA = "ifany")

event_study_reg <- feols(sentiment ~ i(TimeBin, ref = "0-1 year before") | Leader + Year,
                         data = populist_data)

summary(event_study_reg)

# Extract the coefficients and standard errors
coef_data <- tidy(event_study_reg) %>%
  filter(grepl("TimeBin", term)) %>%
  mutate(
    term = gsub("TimeBin::", "", term),
    term = case_when(
      term == "More than 3 years before" ~ "-3",
      term == "2-3 years before" ~ "-2",
      term == "1-2 years before" ~ "-1",
      term == "0-1 year before" ~ "0",
      term == "0-1 year after" ~ "1",
      term == "1-2 years after" ~ "2",
      term == "2-3 years after" ~ "3",
      term == "More than 3 years after" ~ "4"
    ),
    term = as.numeric(term)
  ) %>%
  arrange(term)

# Print the structure of coef_data to see all columns
print(str(coef_data))

# Add the reference category (0-1 year before)
reference_row <- coef_data[1, ]
reference_row$term <- 0
reference_row$estimate <- 0
reference_row$std.error <- 0

coef_data <- rbind(reference_row, coef_data)

# Create the plot - FIGURE 5
event_study_plot <- ggplot(coef_data, aes(x = term, y = estimate)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = estimate - 1.96 * std.error, 
                    ymax = estimate + 1.96 * std.error), 
                width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  scale_x_continuous(breaks = c(-3, -2, -1, 0, 1, 2, 3, 4),
                     labels = c("-3 years", "-2 years", "-1 year", "Last Year Before Mention", "First Year With Mention", "+2 years", "+3 years", "Period Beyond"),
                     limits = c(-3.5, 4.5)) +
  scale_y_continuous(limits = c(-1, 1)) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "gray90"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title.x = element_blank(),  # Remove x-axis title
    axis.title.y = element_blank()   # Remove y-axis title
  )

# Display the plot
print(event_study_plot)

# Save the plot
ggsave("event_study_plot_updated.pdf", event_study_plot, width = 10, height = 6) 


### Figure 6: Sentiment Analyses of Populist Pages in Wikipedia - Populism Label and Ideology

pop_hist_ideo <- pop_hist %>% filter(hog_ideology == "rightist" | hog_ideology == "centrist" | hog_ideology == "leftist") # remove hog_ideology NA values
pop_hist_ideo <-  pop_hist_ideo %>% mutate(ideology = factor(hog_ideology, levels = c("rightist", "centrist", "leftist")))

# Create event study variables
populist_data <- pop_hist_ideo %>%
  group_by(Leader) %>%
  mutate(
    first_populist_mention = min(Year[populism == 1], na.rm = TRUE),
    YearsToEvent = Year - first_populist_mention,
    YearsToEvent = ifelse(is.infinite(YearsToEvent), NA, YearsToEvent),  # Replace Inf with NA
    TimeBin = cut(YearsToEvent, 
                  breaks = c(-Inf, -3, -2, -1, 0, 1, 2, 3, Inf),
                  labels = c("More than 3 years before", "2-3 years before", "1-2 years before", 
                             "0-1 year before", "0-1 year after", 
                             "1-2 years after", "2-3 years after", 
                             "More than 3 years after"),
                  include.lowest = TRUE)
  ) %>%
  ungroup()

# Run the event study regression with ideology interaction
event_study_reg_ideology <- feols(sentiment ~ i(TimeBin, ideology, ref = "0-1 year before") | Leader + Year,
                                  data = populist_data)

# Extract coefficients and prepare for plotting
coef_data_ideology <- tidy(event_study_reg_ideology) %>%
  filter(grepl("TimeBin", term)) %>%
  mutate(
    ideology = case_when(
      grepl("leftist", term) ~ "Leftist",
      grepl("centrist", term) ~ "Centrist",
      TRUE ~ "Rightist"
    ),
    term = gsub("TimeBin::|\\..*", "", term)
  )

# Assuming coef_data is your dataframe
coef_data_ideology <- coef_data_ideology %>%
  mutate(term = case_when(term == "More than 3 years before:ideology::rightist" ~ "More than 3 years before", 
                          term =="More than 3 years before:ideology::centrist" ~ "More than 3 years before", 
                          term =="More than 3 years before:ideology::leftist" ~ "More than 3 years before", 
                          term =="2-3 years before:ideology::rightist" ~ "2-3 years before", 
                          term =="2-3 years before:ideology::centrist" ~ "2-3 years before", 
                          term =="2-3 years before:ideology::leftist" ~ "2-3 years before", 
                          term == "1-2 years before:ideology::rightist" ~ "1-2 years before", 
                          term =="1-2 years before:ideology::centrist" ~ "1-2 years before", 
                          term =="1-2 years before:ideology::leftist" ~ "1-2 years before", 
                          term =="0-1 year before:ideology::rightist" ~ "0-1 year before", 
                          term =="0-1 year before:ideology::centrist" ~ "0-1 year before", 
                          term =="0-1 year before:ideology::leftist" ~ "0-1 year before", 
                          term =="0-1 year after:ideology::rightist" ~ "0-1 year after", 
                          term =="0-1 year after:ideology::centrist" ~ "0-1 year after", 
                          term =="0-1 year after:ideology::leftist" ~ "0-1 year after", 
                          term =="1-2 years after:ideology::rightist" ~ "1-2 years after",
                          term =="1-2 years after:ideology::centrist" ~ "1-2 years after",
                          term == "1-2 years after:ideology::leftist" ~ "1-2 years after",
                          term == "2-3 years after:ideology::rightist" ~ "2-3 years after", 
                          term == "2-3 years after:ideology::centrist" ~ "2-3 years after",
                          term == "2-3 years after:ideology::leftist" ~ "2-3 years after",
                          term == "More than 3 years after:ideology::rightist" ~ "More than 3 years after",
                          term == "More than 3 years after:ideology::centrist" ~ "More than 3 years after",
                          term == "More than 3 years after:ideology::leftist" ~ "More than 3 years after",
                          TRUE ~ term)) %>%
  mutate(
    term = factor(term, levels = c("More than 3 years before", "2-3 years before", "1-2 years before", 
                                   "0-1 year before", "0-1 year after", 
                                   "1-2 years after", "2-3 years after", 
                                   "More than 3 years after"),
                  ordered = TRUE),
    ideology = gsub("ideology:", "", ideology)
  )


# Get the levels of the term factor from coef_data
term_levels <- levels(coef_data_ideology$term)

# Add the reference point (0 for 0-1 year before)
reference_rows <- expand.grid(
  term = factor("0-1 year before", levels = term_levels, ordered = TRUE),
  ideology = unique(coef_data_ideology$ideology),
  estimate = 0,
  std.error = 0
)

# Combine the datasets
coef_data_ideology <- bind_rows(coef_data_ideology, reference_rows) %>%
  arrange(ideology, term)


# First, let's ensure the ideology factor levels are correctly capitalized
coef_data_ideology <- coef_data_ideology %>%
  mutate(Ideology = factor(ideology, levels = c("Centrist", "Leftist", "Rightist"),
                           labels = c("Centrist", "Leftist", "Rightist")))

# Define the new labels
new_labels <- c("-3 years", "-2 years", "-1 year", "Last Year Before Mention", 
                "First Year With Mention", "+2 years", "+3 years", "Period Beyond")

# Now, let's create the plot
ggplot(coef_data_ideology, aes(x = term, y = estimate, group = 1)) +
  geom_line(size = 0.5) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = estimate - 1.96 * std.error, 
                    ymax = estimate + 1.96 * std.error), 
                width = 0.2, size = 1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  geom_vline(xintercept = "0-1 year before", linetype = "dashed", color = "gray") +
  facet_wrap(~ ideology, ncol = 1) +
  labs(x = "Time Relative to First Populist Mention",
       y = "Estimated Effect on Sentiment") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    panel.grid.minor = element_blank(),
    axis.title = element_text(size= 10),
    strip.text = element_text(face = "bold", size = 10)
  ) +
  scale_x_discrete(labels = new_labels) +
  coord_cartesian(ylim = c(-1, 1))

ggsave("event_study_plot_faceted.png", width = 15, height = 6, dpi = 300)

# Create the combined LaTeX table
combined_event_study_table <- etable(event_study_reg, event_study_reg_ideology,
                                     title = "Event Study Results: Sentiment Change Around First Populist Mention",
                                     label = "tab:combined_event_study",
                                     tex = TRUE,
                                     style.tex = style.tex(main = "aer"),
                                     headers = c("Without Ideology", "With Ideology"),
                                     notes = "Standard errors in parentheses. * p<0.1, ** p<0.05, *** p<0.01",
                                     dict = c("More than 3 years before" = "More than 3 years before",
                                              "2-3 years before" = "2-3 years before",
                                              "1-2 years before" = "1-2 years before",
                                              "0-1 year before" = "0-1 year before (ref)",
                                              "0-1 year after" = "0-1 year after",
                                              "1-2 years after" = "1-2 years after",
                                              "2-3 years after" = "2-3 years after",
                                              "More than 3 years after" = "More than 3 years after",
                                              "ideology" = "Ideology",
                                              "PopulistPopulist" = "Populist"))

# Save the LaTeX code to a file
writeLines(combined_event_study_table, "combined_event_study_results.tex")

# Print the LaTeX code to the console 
cat(combined_event_study_table)


### ------------------ TWO-WAY FIXED EFFECTS REGRESSION RESULTS  (Table 4, Figure 8, and Figure 9) ------------------ ###

all_edits_scores_ideo <- all_edits_scores_content %>% filter(hog_ideology == "leftist" | hog_ideology == "rightist" | hog_ideology == "centrist") # remove NA hog_ideology values

# Table 4: Regression Results - All Models

reg_1 <- lm(sentiment ~ Populist + hog_ideology, data = all_edits_scores_ideo)

reg_2 <- feols(sentiment ~ Populist | factor(Year), data = all_edits_scores_content)

reg_3 <- feols(sentiment ~ Populist | factor(Year) + country_name, data = all_edits_scores_content)

reg_4 <-  feols(sentiment ~ Populist*factor(Year) | country_name, data = all_edits_scores_content)

reg_5 <- feols(sentiment ~ Populist + hog_ideology | factor(Year) + country_name, data = all_edits_scores_ideo)

reg_6 <- feols(sentiment ~ Populist * hog_ideology | factor(Year) + country_name, data = all_edits_scores_ideo)

summary(reg_1)
summary(reg_2)
summary(reg_3)
summary(reg_4)
summary(reg_5)
summary(reg_6)


# Table with all models
all_models <- list(
  "OLS (1)" = reg_1,
  "OLS (2)" = reg_2,
  "FE (3)" = reg_3,
  "FE (4)" = reg_4,
  "FE (5)" = reg_5,
  "FE (6)" = reg_6
)

# Create a data frame for additional rows
add_rows <- tribble(
  ~term,          ~"OLS (1)", ~"OLS (2)", ~"FE (3)", ~"FE (4)", ~"FE (5)", ~"FE (6)",
  "Year FE",      "No",       "No",       "Yes",     "Yes",     "Yes",     "Yes",
  "Country FE",   "No",       "No",       "No",      "Yes",     "Yes",     "Yes"
)

latex_table <- modelsummary(all_models,
                            title = "Regression Results - All Models",
                            stars = TRUE,
                            gof_map = c("nobs", "r.squared", "adj.r.squared"),
                            coef_map = c("(Intercept)" = "Intercept",
                                         "PopulistPopulist" = "Populist",
                                         "hog_ideologyleftist" = "Ideology (Left)",
                                         "hog_ideologyrightist" = "Ideology (Right)",
                                         "hog_ideologycentrist" = "Ideology (Center)",
                                         "PopulistPopulist:hog_ideologyleftist" = "Populist × Ideology (Left)",
                                         "PopulistPopulist:hog_ideologyrightist" = "Populist × Ideology (Right)"),
                            "PopulistPopulist:hog_ideologycentrist" = "Populist x Ideology (Center)",
                            add_rows = add_rows,
                            notes = "Standard errors in parentheses. * p<0.1, ** p<0.05, *** p<0.01",
                            output = "table_all_models.tex")



# Table with only FE models
fe_models <- list(
  "FE (3)" = reg_3,
  "FE (4)" = reg_4,
  "FE (5)" = reg_5,
  "FE (6)" = reg_6
)

# Create a data frame for additional rows (FE models only)
add_rows_fe <- tribble(
  ~term,          ~"FE (3)", ~"FE (4)", ~"FE (5)", ~"FE (6)",
  "Year FE",      "Yes",     "Yes",     "Yes",     "Yes",
  "Country FE",   "No",      "Yes",     "Yes",     "Yes"
)

modelsummary(fe_models,
             title = "Regression Results - Fixed Effects Models",
             stars = TRUE,
             gof_map = c("nobs", "r.squared", "adj.r.squared"),
             coef_map = c("PopulistPopulist" = "Populist",
                          "hog_ideologyleftist" = "Ideology (Left)",
                          "hog_ideologyrightist" = "Ideology (Right)",
                          "hog_ideologycentrist" = "Ideology (Center)",
                          "PopulistPopulist:hog_ideologyleftist" = "Populist × Ideology (Left)",
                          "PopulistPopulist:hog_ideologyrightist" = "Populist × Ideology (Right)",
                          "PopulistPopulist:hog_ideologycentrist" = "Populist x Ideology (Center)"
             ),
             add_rows = add_rows_fe,
             notes = "Standard errors in parentheses. * p<0.1, ** p<0.05, *** p<0.01",
             output = "table_fe_models.tex")



### Figure 8: Sentiment Differences of Heads of Government Over Time)

# Extract coefficients
coef_data <- tidy(reg_4) %>%
  filter(grepl("factor\\(Year\\)", term)) %>%
  mutate(
    Year = as.numeric(sub("factor\\(Year\\)(\\d+).*", "\\1", term)),
    Group = case_when(
      grepl("PopulistPopulist", term) ~ "Populist",
      !grepl("Populist", term) ~ "Non-Populist",
      TRUE ~ NA_character_
    ),
    Estimate = estimate,
    SE = std.error,
    CI_lower = estimate - 1.96 * std.error,
    CI_upper = estimate + 1.96 * std.error
  ) %>%
  filter(!is.na(Group))

# Print the coefficient data to check
print(coef_data)


ggplot(coef_data, aes(x = Year, y = Estimate)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), 
                width = 0.2, 
                position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  coord_cartesian(ylim = c(-1, 1)) +
  labs(#title = "Difference in Sentiment Between Populist and Non-Populist Leaders",
    x = "Year",
    y = "Estimated Difference in Sentiment")


ggsave("sentiment_populist_vs_nonpopulist.pdf", width = 12, height = 6)


### Figure 9: Sentiment Analyses of Heads of Government in Wikipedia - Populism Conditional on Ideology

coef_data <- data.frame(
  term = c("PopulistPopulist", "PopulistPopulist:hog_ideologyleftist", "PopulistPopulist:hog_ideologycentrist",
           "hog_ideologyleftist", "hog_ideologycentrist"),
  estimate = c(0.506, -0.359, 0.1, 0.108, 0.05),
  std.error = c(0.098, 0.159, 0.1, 0.017, 0.015)
)

# Prepare data for plotting
plot_data <- coef_data %>%
  mutate(
    category = case_when(
      term == "PopulistPopulist" ~ "Right Populist",
      term == "PopulistPopulist:hog_ideologyleftist" ~ "Left Populist",
      term == "PopulistPopulist:hog_ideologycentrist" ~ "Center Populist",
      term == "hog_ideologyleftist" ~ "Left Non-Populist",
      term == "hog_ideologycentrist" ~ "Center Non-Populist"
    ),
    ci_lower = estimate - 1.96 * std.error,
    ci_upper = estimate + 1.96 * std.error
  )

# Plot figure

plot_data <- rbind(plot_data, 
                   data.frame(term="Reference", estimate=0, std.error=0, 
                              category="Right Non-Populist",
                              ci_lower=0, ci_upper=0))

# Create the plot
ggplot(plot_data, aes(x = category, y = estimate, fill = category)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), color = "black") +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), 
                width = 0.2, 
                position = position_dodge(width = 0.9)) +
  scale_fill_manual(values = c(
    "Left Populist" = "firebrick", 
    "Right Populist" = "darkred",
    "Center Populist" = "indianred",
    "Left Non-Populist" = "navy", 
    "Right Non-Populist" = "royalblue",
    "Center Non-Populist" = "steelblue"
  )) +
  labs(
    x = "Category",
    y = "Coefficient Estimate",
    fill = "Category"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none",
    plot.title = element_text(hjust = 0.5)
  ) +
  coord_cartesian(ylim = c(-1, 1)) +
  geom_text(aes(label = sprintf("%.2f", estimate)), 
            position = position_dodge(width = 0.9), 
            vjust = ifelse(plot_data$estimate >= 0, -0.5, 1.5),
            size = 4) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.5)

print(last_plot())

ggsave("ideology-populism-coefficients.pdf", width = 12, height = 6)



### ------------------ APPENDIX ------------------ ###

# This section has the code for figures in the Appendix unrelated to regression results (which are coded above)

### Figure 11: Trends in Populism mentions on the Guardian and Google

# evolution of populism on the guardian 

search_guardian <- function(query, api_key, from_date, to_date, max_retries = 3) {
  base_url <- "https://content.guardianapis.com/search"
  
  params <- list(
    "api-key" = api_key,
    "q" = query,
    "from-date" = from_date,
    "to-date" = to_date,
    "page-size" = 1
  )
  
  for (attempt in 1:max_retries) {
    response <- tryCatch({
      GET(base_url, query = params)
    }, error = function(e) {
      cat("Error on attempt", attempt, ":", conditionMessage(e), "\n")
      return(NULL)
    })
    
    if (!is.null(response) && status_code(response) == 200) {
      content <- content(response, "text", encoding = "UTF-8")
      data <- fromJSON(content)
      return(data$response$total)
    } else if (!is.null(response)) {
      cat("Error: Status code", status_code(response), "\n")
    }
    
    if (attempt < max_retries) {
      cat("Retrying in 5 seconds...\n")
      Sys.sleep(5)
    }
  }
  
  stop("Failed to get a valid response after", max_retries, "attempts")
}

api_key <- "cfb4e0be-f6e3-457d-86f1-17a26587fbfe"

years <- 2000:year(Sys.Date())
results <- data.frame(year = integer(), populism_count = integer(), total_count = integer(), ratio = numeric())

for (year in years) {
  from_date <- paste0(year, "-01-01")
  to_date <- paste0(year, "-12-31")
  
  cat("Processing year:", year, "\n")
  
  populism_count <- search_guardian("populism OR populist", api_key, from_date, to_date)
  cat("Populism count:", populism_count, "\n")
  
  total_count <- search_guardian("", api_key, from_date, to_date)
  cat("Total count:", total_count, "\n")
  
  ratio <- ifelse(total_count > 0, populism_count / total_count, 0)
  
  results <- rbind(results, data.frame(year = year, populism_count = populism_count, total_count = total_count, ratio = ratio))
  
  cat("Ratio:", ratio, "\n\n")
  Sys.sleep(2)  # Delay to avoid rate limits
}

# After the loop completes, save the results to a CSV file
write.csv(results, file = "guardian_populism_results.csv", row.names = FALSE)

results_2004_onwards <- results[results$year >= 2004, ]


# Create the plot
guardian <- ggplot(results_2004_onwards, aes(x = year, y = ratio)) +
  geom_line() +
  geom_point() +
  theme_minimal() +
  labs(title = "Mentions of 'Populism/Populist' in The Guardian (2004-2024)",
       subtitle = "Normalized by total number of articles (Ratio)",
       x = "Year",
       y = "") +
  scale_x_continuous(breaks = seq(2004, max(years), by = 2)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Save the plot
ggsave("plots/guardian_guardian.pdf", plot = guardian, width = 10, height = 6)


# adding google 
d <- read.csv("data/multiTimeline.csv", skip = 1, header = FALSE, col.names = c("month", "index"), stringsAsFactors = FALSE)

# Remove the first row
d <- d[-1, ]

# Convert month to Date type and index to numeric
d$month <- as.Date(paste0(d$month, "-01"))
d$index <- as.numeric(d$index)

# Extract year from date and calculate yearly average
d_yearly <- d %>%
  mutate(year = year(month)) %>%
  group_by(year) %>%
  summarize(avg_index = mean(index, na.rm = TRUE))

# Create the plot
google_trends<- ggplot(d_yearly, aes(x = year, y = avg_index)) +
  geom_line() +
  geom_point() +
  theme_minimal() +
  labs(title = "Google Trends: Populism (2004-2024)",
       subtitle = "Yearly average trend index",
       x = "Year",
       y = "") +
  scale_x_continuous(breaks = seq(min(d_yearly$year), max(d_yearly$year), by = 2)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("plots/google_trends.pdf", plot = google_trends, width = 10, height = 6)

combined_plot <- google_trends / guardian +
  plot_layout(heights = c(1, 1)) +
  plot_annotation(
    title = "Populism Trends: Google Trends vs The Guardian",
    theme = theme(plot.title = element_text(hjust = 0.5))
  )

combined_plot <- grid.arrange(google_trends, guardian, ncol = 1)

ggsave("plots/trends_in_populism.pdf", combined_plot, width = 7, height = 5)


### Figure 12: Average Number of Edits in Wikipedia pages with 95% Confidence Intervals


edits <- read.csv("edit_counts_hog.csv")

populists <- c("Alvaro Enrique Arzu Irigoyen",
               "Maria Estela Martinez Cartas de Peron",
               "Juan Perón",
               "Carlos Menem",
               "Néstor Kirchner",
               "Cristina Fernandez de Kirchner",
               "Víctor Paz Estenssoro",
               "Hernan Siles Zuazo",
               "Juan Evo Morales Ayma",
               "Getulio Dorneles Vargas",
               "Fernando Collor",
               "Jair Messias Bolsonaro",
               "Boyko Metodiev Borisov",
               "Arturo Alessandri", #out of sample, pre 1945
               "Carlos Ibanez del Campo",
               "Jose Maria Velasco Ibarra",
               "Abdala Jaime Bucaram Ortiz",
               "Rafael Vicente Correa Delgado",
               "Adolf Hitler", #out of sample, 'pre' 1945
               "Alexis Tsipras",
               "Viktor Orban",
               "Indira Priyadarshini Gandhi",
               "Narendra Modi",
               "Sukarno",
               "Joko Widodo",
               "Benjamin Netanyahu",
               "Benito Mussolini", #out of sample, pre 1945
               "Silvio Berlusconi",
               "Junichiro Koizumi",
               "Lazaro Cardenas", #out of sample, pre 1945
               "Miguel Angel Rodriguez Echeverria",
               "Andres Manuel Lopez Obrador",
               "Robert David Muldoon",
               "Alan García",
               "Alberto Fujimori",
               "Joseph Estrada",
               "Rodrigo Roa Duterte",
               "Jaroslaw Aleksander Kaczynski",
               "Vladimir Meciar",
               "Robert Fico",
               "Jacob Gedleyihlekisa Zuma",
               "Roh Moo-hyun",
               "Chen Shui-bian", #out of the sample, it uses premiers and not presidents
               "Thaksin Shinawatra",
               "Recep Tayyip Erdogan",
               "Boris Johnson",
               "Donald John Trump",
               "Hugo Rafael Chavez Frias",
               "Nicolas Maduro",
               "Tsai Ing-wen")


edits <- edits %>% mutate(populist = case_when(Leader %in% populists ~ 1,
                                               TRUE ~ 0))

# Grouping and summarizing edits
edit_summary <- edits %>%
  filter(!is.na(populist)) %>%
  group_by(populist) %>%
  summarise(
    overall_avg_edits = mean(Total_Edits, na.rm = TRUE),
    se = sd(Total_Edits, na.rm = TRUE) / sqrt(n()),
    lower_ci = overall_avg_edits - qt(1 - 0.05 / 2, n() - 1) * se,
    upper_ci = overall_avg_edits + qt(1 - 0.05 / 2, n() - 1) * se
  )

# Plot average edits for pages with and without populism mention
ggplot(edit_summary, aes(x = factor(populist, labels = c("Non-Populists", "Populists")), y = overall_avg_edits)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +  
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.2) +
  theme_minimal() +
  labs(
    y = "Average Edits",
    x = "Funke et al. (2023) Heads of Government Classification"
  ) +
  theme(
    axis.title.x = element_text(size = 14),  # Increase the x-axis title size
    axis.title.y = element_text(size = 14),  # Increase the y-axis title size
    axis.text.x = element_text(size = 12),   # Increase the x-axis text size
    axis.text.y = element_text(size = 12)    # Increase the y-axis text size
  ) +
  scale_y_continuous(
    limits = c(0, max(edit_summary$upper_ci) * 1.1),  # Set y-axis limits
    expand = c(0, 0)  # Remove padding below the x-axis
  )
